#! /usr/bin/perl
# Copyright (C) 2005  Alex Schroeder <alex@emacswiki.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

use CGI;
use CGI::Carp qw(fatalsToBrowser);
use LWP::UserAgent;
use encoding 'utf8';
use POSIX;

my $q = new CGI;
my $url = $q->param('url');
my $pattern = $q->param('pattern');

if (not $url) {
  print $q->header(),
    $q->start_html('ChangeLog to RSS'),
    $q->h1('ChangeLog to RSS'),
    $q->p('Translates ChangeLog output to RSS 2.0.'),
    $q->p(q{$Id: changelog-to-rss,v 1.17 2005/01/07 13:09:27 as Exp $}),
    $q->start_form(-method=>'GET'),
    $q->p('ChangeLog URL: ',
	  $q->textfield('url', '', 70)),
    $q->p('Link pattern if available, use %s for the filename: ',
	  $q->textfield('pattern', '', 70)),
    $q->p('Limit number of entries returned: ',
	  $q->textfield('limit', '15', 5)),
    $q->p($q->submit()),
    $q->end_form(),
    $q->end_html();
  exit;
}

print $q->header(-type=>'application/rss+xml; charset=UTF-8');
my $rss = qq{<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>ChangeLog</title>
<description>RSS feed automatically extracted from a ChangeLog file.</description>
<link>$url</link>
};

my $ua = new LWP::UserAgent;
my $response = $ua->get($url);
die $response->status_line unless $response->is_success;
my $data = $response->content;

my $limit = $q->param('limit') || 15;
my ($date, $author, $file, $log, $count);
foreach my $line (split(/\n/, $data)) {
  # print "----\n$line\n----\n";
  if ($line =~ m/^(\d\d\d\d-\d\d-\d\d)\s*(.*)/) {
    output($date, $author, $file, $log);
    $date = $1;
    $author = $2;
    $file = '';
    $log = '';
  } elsif ($line =~ m|^\t\* ([a-zA-Z0-9./-]+)(.*)|) {
    last if ++$count > $limit;
    output($date, $author, $file, $log);
    $file = $1;
    $log = $2;
  } else {
    $log .= "\n" . $line;
  }
}

output($date, $author, $file, $log) if $file or $log;
$rss .= q{
</channel>
</rss>
};

print $rss;

sub output {
  my ($date, $author, $file, $log) = @_;
  return unless $file;
  $date = to_date($date);
  $author = quote_html($author);
  $log =~ s|^\t||mg; # strip leading tabs on every line
  $log =~ s|\)\n\(|, |g; # fix weird continuation groups
  # add linebreaks and highlighting for parentheses
  $log =~ s|\((.*?)\):|</span><span class="chunk"><br />(<strong>$1</strong>):|g;
  $log =~ s|^ *<br />||; # strip first linebreak, if there is one
  $log = quote_html($q->span({-class=>"chunk"}, $log));
  my $link = $pattern;
  $link =~ s/\%s/$file/g or $link .= $file;
  $rss .= "<item>\n";
  $rss .= "<author>$author</author>\n" if $author;
  $rss .= "<pubDate>$date</pubDate>\n" if $date;
  $rss .= "<title>$file</title>\n" if $file;
  $rss .= "<link>$link</link>\n" if $link;
  $rss .= "<description>$log</description>\n" if $log;
  $rss .= "</item>\n\n";
}

sub to_date {
  $_ = shift;
  my ($year, $month, $day) = split(/-/);
  # Wed, 02 Oct 2002 00:00:00 GMT
  return strftime("%a, %d %b %Y 00:00:00 GMT",
		  0, 0, 0, $day, $month - 1, $year - 1900);
}

sub quote_html {
  $_ = shift;
  s/&/&amp;/g;
  s/</&lt;/g;
  s/>/&gt;/g;
  s/&amp;([#a-zA-Z0-9]+);/&$1;/g;  # Allow character references
  return $_;
}
